﻿************** DATA PREPARATION IN SPSS ***********
* This file shows the syntax accompanying the steps followed to prepare the data. 
* These steps can be found in the file "Data retrieval and preparation.docx"

* The syntax is based on IBM SPSS Statistics version 21.

* Step 2
* Selecting cases with no input in wave 1 to 5. These are persons added in the last wave.
IF (NMISS(cp08a001,
cp08a002,
cp08a003,
cp08a004,
cp08a005,
cp08a006,
cp08a007,
cp08a008,
cp08a009,
cp09b001,
cp09b002,
cp09b003,
cp09b004,
cp09b005,
cp09b006,
cp09b007,
cp09b008,
cp09b009,
cp10c001,
cp10c002,
cp10c003,
cp10c004,
cp10c005,
cp10c006,
cp10c007,
cp10c008,
cp10c009,
cp11d001,
cp11d002,
cp11d003,
cp11d004,
cp11d005,
cp11d006,
cp11d007,
cp11d008,
cp11d009,
cp12e001,
cp12e002,
cp12e003,
cp12e004,
cp12e005,
cp12e006,
cp12e007,
cp12e008,
cp12e009) = 45) Wave6ad = 1.
IF (NMISS(cp08a001,
cp08a002,
cp08a003,
cp08a004,
cp08a005,
cp08a006,
cp08a007,
cp08a008,
cp08a009,
cp09b001,
cp09b002,
cp09b003,
cp09b004,
cp09b005,
cp09b006,
cp09b007,
cp09b008,
cp09b009,
cp10c001,
cp10c002,
cp10c003,
cp10c004,
cp10c005,
cp10c006,
cp10c007,
cp10c008,
cp10c009,
cp11d001,
cp11d002,
cp11d003,
cp11d004,
cp11d005,
cp11d006,
cp11d007,
cp11d008,
cp11d009,
cp12e001,
cp12e002,
cp12e003,
cp12e004,
cp12e005,
cp12e006,
cp12e007,
cp12e008,
cp12e009) < 45) Wave6ad = 0.
EXECUTE.

* Removing persons added in the last wave.
FILTER OFF.
USE ALL.
SELECT IF (Wave6ad = 0).
EXECUTE.

*Step 3.
* recoding the negative item 6 for each wave.
RECODE cp08a006 cp09b006 cp10c006 cp11d006 cp12e006 cp13f006 (1=7) (2=6) (3=5) (4=4) (5=3) (6=2) 
    (7=1).
EXECUTE.

* Step 4.
* giving all missings a value of 999.
RECODE cp08a006 cp09b006 cp10c006 cp11d006 cp12e006 cp13f006 cp08a001 cp08a002 cp08a003 cp08a004 
    cp08a005 cp08a007 cp08a008 cp08a009 cp09b001 cp09b002 cp09b003 cp09b004 cp09b005 cp09b007 cp09b008 
    cp09b009 cp10c001 cp10c002 cp10c003 cp10c004 cp10c005 cp10c007 cp10c008 cp10c009 cp11d001 cp11d002 
    cp11d003 cp11d004 cp11d005 cp11d007 cp11d008 cp11d009 cp12e001 cp12e002 cp12e003 cp12e004 cp12e005 
    cp12e007 cp12e008 cp12e009 cp13f001 cp13f002 cp13f003 cp13f004 cp13f005 cp13f007 cp13f008 cp13f009 
    (SYSMIS=999).
EXECUTE.

* Step 5.
* Open dataset with household indicators.
* Identify Duplicate Cases Sort them ascending based on wave, and label the first as primary (1) and the others 0.
DATASET ACTIVATE DataSet1.
SORT CASES BY nomem_encr(A) wave(A).
MATCH FILES
  /FILE=*
  /BY nomem_encr
  /FIRST=PrimaryFirst.
VARIABLE LABELS  PrimaryFirst 'Indicator of each first matching case as Primary'.
VALUE LABELS  PrimaryFirst 0 'Duplicate Case' 1 'Primary Case'.
VARIABLE LEVEL  PrimaryFirst (ORDINAL).
FREQUENCIES VARIABLES=PrimaryFirst.
EXECUTE.

* Keep only the primary cases.
FILTER OFF.
USE ALL.
SELECT IF  (NOT(PrimaryFirst=0)).
EXECUTE.

* Step 6.
* Merge the house indicator file and the survey attitude file.
* DataSet2 indicates the file containing the Survey Attitude data.
STAR JOIN
  /SELECT t1.wave, t1.nohouse_encr, t1.PrimaryFirst, t0.cp08a001, t0.cp08a002, t0.cp08a003, 
    t0.cp08a004, t0.cp08a005, t0.cp08a006, t0.cp08a007, t0.cp08a008, t0.cp08a009, t0.cp09b001, 
    t0.cp09b002, t0.cp09b003, t0.cp09b004, t0.cp09b005, t0.cp09b006, t0.cp09b007, t0.cp09b008, 
    t0.cp09b009, t0.cp10c001, t0.cp10c002, t0.cp10c003, t0.cp10c004, t0.cp10c005, t0.cp10c006, 
    t0.cp10c007, t0.cp10c008, t0.cp10c009, t0.cp11d001, t0.cp11d002, t0.cp11d003, t0.cp11d004, 
    t0.cp11d005, t0.cp11d006, t0.cp11d007, t0.cp11d008, t0.cp11d009, t0.cp12e001, t0.cp12e002, 
    t0.cp12e003, t0.cp12e004, t0.cp12e005, t0.cp12e006, t0.cp12e007, t0.cp12e008, t0.cp12e009, 
    t0.cp13f001, t0.cp13f002, t0.cp13f003, t0.cp13f004, t0.cp13f005, t0.cp13f006, t0.cp13f007, 
    t0.cp13f008, t0.cp13f009
  /FROM 'DataSet2' AS t0
  /JOIN * AS t1
    ON t0.nomem_encr=t1.nomem_encr
  /OUTFILE FILE=*.

* Step 8.
* Creating a long format.
VARSTOCASES
  /MAKE y001 FROM cp08a001 cp09b001 cp10c001 cp11d001 cp12e001 cp13f001
  /MAKE y002 FROM cp08a002 cp09b002 cp10c002 cp11d002 cp12e002 cp13f002
  /MAKE y003 FROM cp08a003 cp09b003 cp10c003 cp11d003 cp12e003 cp13f003
  /MAKE y004 FROM cp08a004 cp09b004 cp10c004 cp11d004 cp12e004 cp13f004
  /MAKE y005 FROM cp08a005 cp09b005 cp10c005 cp11d005 cp12e005 cp13f005
  /MAKE y006 FROM cp08a006 cp09b006 cp10c006 cp11d006 cp12e006 cp13f006
  /MAKE y007 FROM cp08a007 cp09b007 cp10c007 cp11d007 cp12e007 cp13f007
  /MAKE y008 FROM cp08a008 cp09b008 cp10c008 cp11d008 cp12e008 cp13f008
  /MAKE y009 FROM cp08a009 cp09b009 cp10c009 cp11d009 cp12e009 cp13f009
  /INDEX=Index1(6) 
  /KEEP=nomem_encr wave nohouse_encr
  /NULL=KEEP.

* Step 10.
* Compute subconstruct means.
COMPUTE JOY1=MEAN(cp08a001,cp08a002,cp08a003).
COMPUTE JOY2=MEAN(cp09b001,cp09b002,cp09b003).
COMPUTE JOY3=MEAN(cp10c001,cp10c002,cp10c003).
COMPUTE JOY4=MEAN(cp11d001,cp11d002,cp11d003).
COMPUTE JOY5=MEAN(cp12e001,cp12e002,cp12e003).
COMPUTE JOY6=MEAN(cp13f001,cp13f002,cp13f003).

COMPUTE VAL1=MEAN(cp08a004,cp08a005,cp08a006).
COMPUTE VAL2=MEAN(cp09b004,cp09b005,cp09b006).
COMPUTE VAL3=MEAN(cp10c004,cp10c005,cp10c006).
COMPUTE VAL4=MEAN(cp11d004,cp11d005,cp11d006).
COMPUTE VAL5=MEAN(cp12e004,cp12e005,cp12e006).
COMPUTE VAL6=MEAN(cp13f004,cp13f005,cp13f006).

COMPUTE BUR1=MEAN(cp08a007,cp08a008,cp08a009).
COMPUTE BUR2=MEAN(cp09b007,cp09b008,cp09b009).
COMPUTE BUR3=MEAN(cp10c007,cp10c008,cp10c009).
COMPUTE BUR4=MEAN(cp11d007,cp11d008,cp11d009).
COMPUTE BUR5=MEAN(cp12e007,cp12e008,cp12e009).
COMPUTE BUR6=MEAN(cp13f007,cp13f008,cp13f009).
EXECUTE.

* Step 14.
* Matching datafiles: survey attitude plus housholds with backgroundvariables_firstentry.
* Where DataSet2 represents survey attitude plus housheholds file (see 'Dataset3').
DATASET ACTIVATE DataSet1.
MATCH FILES /FILE=*
  /FILE='DataSet2'
  /RENAME (nohouse_encr wave = d0 d1) 
  /IN source01
  /BY nomem_encr
  /DROP= d0 d1.
VARIABLE LABELS source01 'Case source is DataSet2'.
EXECUTE.

*keep only the cases with survey attitude data (the cases from the survey attitude plus households file).
FILTER OFF.
USE ALL.
SELECT IF  (NOT(source01=0)).
EXECUTE.

* Recode missings.
RECODE nomem_encr wave nohouse_encr geslacht gebjaar sted brutoink_f oplcat werving herkomstgroep 
    (SYSMIS=999).
EXECUTE.

* Step 16.
* Creating a linear transformation of the birthyear variable: age in 2007 divided by 10.
DATASET ACTIVATE DataSet1.
COMPUTE Age07div10=(2007-gebjaar)/10.
EXECUTE.
